\[\\[.4in]\]
library(dplyr)
library(ggplot2)
library(lubridate)
library(MMWRweek)
library(readr)
library(rlang)
library(magrittr)
library(aws.s3)
library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(plotly)
library(readr)
library(stringr)
library(tidyr)
suppressPackageStartupMessages(source(here::here("R", "load_all.R")))
forecast_date <- as.Date("2024-11-20")
epi_data <- readr::read_csv("https://data.cdc.gov/resource/ua7e-t2fy.csv?$limit=20000&$select=weekendingdate,jurisdiction,totalconfflunewadm")
epi_data <- epi_data %>%
mutate(
epiweek = epiweek(weekendingdate),
epiyear = epiyear(weekendingdate)
) %>%
left_join(
(.) %>%
distinct(epiweek, epiyear) %>%
mutate(
season = convert_epiweek_to_season(epiyear, epiweek),
season_week = convert_epiweek_to_season_week(epiyear, epiweek)
),
by = c("epiweek", "epiyear")
)
epi_data <- epi_data %>%
mutate(time_value = as.Date(weekendingdate), geo_value = tolower(jurisdiction), nhsn = totalconfflunewadm) %>%
select(-weekendingdate, -jurisdiction, -totalconfflunewadm)
ahead <- 0
window_size <- 3
recent_window <- 3
geos_to_plot <- c("mn", "ca", "pa", "usa", "ri") #epi_data %>% pull(geo_value) %>% unique() %>% setdiff(c("as", "mp", "vi")) #
all_geos_to_plot <- epi_data %>% pull(geo_value) %>% unique() %>% setdiff(c("as", "mp", "vi"))
plot_quantiles <- c(0.6, 0.75, 0.95, 0.99)
quantile_alphas <- c(0.9, 0.6, 0.4, 0.3)
min_plot_quantiles <- c(0.75, 0.90)
min_quantile_alphas <- c(0.9, 0.3)
The model used is specified here. For flu, the data for the 2020/21 and 2021/22 seasons are dropped, while for covid we’ve done versions both including and excluding.
quantile_basic uses base R’s quantile’s, each geo
separately, using a window of 7 weeks (3 before and after the target
date), and the 3 weeks before the last week of dataepipred quantile is more or less the same, but using
the quantile extrapolation in epipredict after fitting the quantiles
.25, .5, and .75. The main difference is that the tails are more extreme
and can be outside the range of previously seen values for that
location.geo aggregated is a global fit, that first puts
everything on a rate scale, and then does a single global forecast, and
then returns to the original scales. It is just using base R
quantiles.The intervals here are the 75th and 90th quantiles (more quantiles uses 60th, 75, 95, and 99th). Some notes:
quantile_basic’s upper 99th quantile is frequently
indistinguishable from it’s 95th quantile.quantile_basic <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead)) %>%
bind_rows() %>%
mutate(geo_type = "state", forecaster = "quantile basic")
quantile_7 <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead, quant_type = 7)) %>%
bind_rows() %>%
mutate(geo_type = "state", forecaster = "quantile_7")
epipred_extrapolation <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead, quantile_method = "epipredict")) %>% bind_rows() %>%
mutate(geo_type = "state", forecaster = "epipred quantile")
geo_aggregate <- lapply(-1:3, \(ahead) climatological_model(epi_data, forecast_date, ahead, geo_agg = TRUE)) %>% bind_rows() %>%
mutate(geo_type = "state", forecaster = "geo aggregated")
all_results <- bind_rows(
geo_aggregate,
quantile_basic,
epipred_extrapolation
)
small_truth_data <- epi_data %>%
mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
filter(geo_value %in% geos_to_plot, time_value > "2022-09-01")
the_plot <-
all_results %>%
filter(geo_value %in% geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
small_truth_data <- epi_data %>%
mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
filter(geo_value %in% geos_to_plot, time_value > "2022-09-01")
the_plot <-
all_results %>%
filter(geo_value %in% geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
full_truth_data <- epi_data %>%
mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
filter(geo_value %in% all_geos_to_plot, time_value > "2022-09-01")
the_plot <-
all_results %>%
filter(geo_value %in% all_geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 5000, width = 1700) %>%
layout(hoverlabel = list(bgcolor = "white"))
full_truth_data <- epi_data %>%
mutate(value = nhsn, target_end_date = time_value, forecaster = "nhsn") %>%
filter(geo_value %in% all_geos_to_plot, time_value > "2022-09-01")
the_plot <-
all_results %>%
filter(geo_value %in% all_geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 5000, width = 1700) %>%
layout(hoverlabel = list(bgcolor = "white"))
These are * the mean of the geo aggregated and the
epipredict quantile above * the mean of the geo aggregated
and the quantile basic above * the mean of all 3
the_plot <- mean_climatological %>%
filter(geo_value %in% geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
the_plot <- mean_climatological %>%
filter(geo_value %in% geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
the_plot <- mean_climatological %>%
filter(geo_value %in% all_geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
the_plot <- mean_climatological %>%
filter(geo_value %in% all_geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
Since the dynamics are exponential, averaging on the log scale is probably a better method. That way, if the national is predicting something an order of magnitude higher that won’t swamp the local information. This is the same set using the geomean instead of the arithmetic mean.
"all three"the mean of all 3"epipred quantile"the mean of the geo aggregated and
the quantile basic above"quantile_basic"the mean of the geo aggregated and the
epipredict quantile abovethe_plot <- mean_climatological %>%
filter(geo_value %in% geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
the_plot <- mean_climatological %>%
filter(geo_value %in% geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = small_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 1500, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
the_plot <- mean_climatological %>%
filter(geo_value %in% all_geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = min_plot_quantiles, alphas = min_quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))
the_plot <- mean_climatological %>%
filter(geo_value %in% all_geos_to_plot) %>%
plot_forecasts(forecast_date, geo_type = "state", truth_data = full_truth_data, quantiles = plot_quantiles, alphas = quantile_alphas)
ggplotly(the_plot, tooltip = "text", height = 5000, width = 1600) %>%
layout(hoverlabel = list(bgcolor = "white"))